Triglycerides and diglycerides seem to survive FDR control.
# Store all observations of Class 1 in C1
C1 <- ADmets[geno$APOEb == "E4NO", 1:230]
# Store all observations of Class 2 in C2
C2 <- ADmets[geno$APOEb == "E4YES", 1:230]
# Create a function to perform the Wilcoxon rank-sum (Mann-Whitney U) test on two vectors
MannWhitneyU <- function(x, y) {
wilcoxon <- wilcox.test(x, y, paired = FALSE, alternative = "less")
return(wilcoxon$p.value)
}
# Use purrr::map2 to apply the function to corresponding columns
wilcoxons <- purrr::map2(C1[, 1:230], C2[, 1:230], ~ MannWhitneyU(.x, .y))
# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(wilcoxons))
# Calculate the FDR-adjusted p-values
p_adj <- p.adjust(wilcoxons, method = "fdr")
results <- data.frame(p_values, p_adj)
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
dplyr::filter(results, p_adj < 0.05)Carriers of one copy of E4 vs two copies tend to have less histamine, fumaric acid, uracil, triglyceride TG.48.0, phosphatidylcholine PC.36.4, with p < 0.05, however these don’t survive FDRcut at 0.95 (p_adj = 0.97)’
geno$g <- geno$APOE
geno$g[geno$g == "E3E4"] <- geno$g[geno$g == "E2E4"] <- "E4x1"
geno$g[geno$g == "E4E4"] <- "E4x2"
# Store all observations of Class 1 in C1
C1 <- ADmets[geno$g == "E4x1", 1:230]
# Store all observations of Class 2 in C2
C2 <- ADmets[geno$g == "E4x2", 1:230]
# Use purrr::map2 to apply the function to corresponding columns
wilcoxons <- purrr::map2(C1[, 1:230], C2[, 1:230], ~ MannWhitneyU(.x, .y))
# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(wilcoxons))
# Calculate the FDR-adjusted p-values
p_adj <- p.adjust(wilcoxons, method = "fdr")
results <- data.frame(p_values, p_adj)
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
dplyr::filter(results, p_adj < 0.05)Testing no E4 vs one E4, no metabolites differ significantly
geno$g <- geno$APOE
geno$g[geno$g == "E3E4"] <- geno$g[geno$g == "E2E4"] <- "E4x1"
geno$g[geno$APOEb == "E4NO"] <- "E4x0"
# Store all observations of Class 1 in C1
C1 <- ADmets[geno$g == "E4x1", 1:230]
# Store all observations of Class 2 in C2
C2 <- ADmets[geno$g == "E4x0", 1:230]
# Use purrr::map2 to apply the function to corresponding columns
wilcoxons <- purrr::map2(C1[, 1:230], C2[, 1:230], ~ MannWhitneyU(.x, .y))
# Coerce p_values to dataframe and transpose it
p_values <- t(data.frame(wilcoxons))
# Calculate the FDR-adjusted p-values
p_adj <- p.adjust(wilcoxons, method = "fdr")
results <- data.frame(p_values, p_adj)
# Filter out the non-significant (a=0.05) FDR-adjusted p-values
dplyr::filter(results, p_adj < 0.05)fit <- function(title,
X,
y,
model,
ctrl = NULL,
grid = NULL,
seed = 123, ...) {
set.seed(seed)
# Merge X and y into df
df <- merge.data.frame(X, y)
# Train the model
mdl <- caret::train(df[, 1:ncol(X)], df$y,
method = model,
tuneGrid = grid,
trControl = ctrl,
metric = "ROC",
preProcess = c("center", "scale"),
...
)
# Create a confusion matrix and get performance metrics from caret
obs <- mdl$pred$obs
preds <- mdl$pred$pred
cm <- confusionMatrix(obs, preds,
dnn = c("X0", "X1"), # nolint
positive = "X1")
# Predictions
ys <- as.numeric(obs) -1
yhats <- mdl$pred$X1
roc <- roc(ys, yhats,
levels = c(0, 1),
ci = TRUE, boot.n = 1000, ci.alpha = 0.95)
metrics <- data.frame(c(cm$byClass, roc$auc),
row.names = c(names(cm$byClass), "AUC")
)
names(metrics) <- title
out <- list("metrics" = metrics, "roc" = roc, "model" = mdl)
return(out)
}load("./data.Rdata")
X <- df[df$Diagnosis == "Probable AD", 1:230]
y <- df[df$Diagnosis == "Probable AD", "E4"]
y <- as.factor(y)
levels(y) <- make.names(levels(y))
# Define the model training parameters, repeated 10-fold cross-valuidation
ctrl <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 1,
classProbs = TRUE,
summaryFunction = twoClassSummary,
savePredictions = TRUE,
sampling = 'smote',
)lr <- fit(
title = "Logistic Regression",
X = X,
y = y,
model = "glmnet",
ctrl = ctrl,
grid = expand.grid(alpha = c(0,1), lambda = c(0.001,100,100)))Loading required package: recipes
Attaching package: ‘recipes’
The following object is masked from ‘package:stats’:
step
Setting direction: controls < cases
Error: object 'mdl' not found
ctrl$summaryFunction = multiClassSummary
X <- df[df$Diagnosis == "Probable AD", 1:230]
y <- df[df$Diagnosis == "Probable AD", "APOE"]
y <- as.factor(y)
levels(y) <- make.names(levels(y))library(nnet)
mlr <- multifit(
X = X,
y = y,
model = "multinom",
ctrl = ctrl,
trace = FALSE,
tuneLength = 1
)Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
Confusion Matrix and Statistics
Reference
Prediction X0 X1 X2
X0 1214 1801 2065
X1 2896 1736 3115
X2 1338 1238 726
Overall Statistics
Accuracy : 0.2279
95% CI : (0.2215, 0.2345)
No Information Rate : 0.3662
P-Value [Acc > NIR] : 1
Kappa : -0.1414
Mcnemar's Test P-Value : <2e-16
Statistics by Class:
Class: X0 Class: X1 Class: X2
Sensitivity 0.22283 0.3636 0.12293
Specificity 0.63805 0.4706 0.74802
Pos Pred Value 0.23898 0.2241 0.21987
Neg Pred Value 0.61680 0.6374 0.59616
Prevalence 0.33778 0.2961 0.36617
Detection Rate 0.07527 0.1076 0.04501
Detection Prevalence 0.31496 0.4803 0.20472
Balanced Accuracy 0.43044 0.4171 0.43547
Confusion Matrix and Statistics
Reference
Prediction X0 X1 X2
X0 42 29 49
X1 57 56 70
X2 19 29 30
Overall Statistics
Accuracy : 0.336
95% CI : (0.2887, 0.3858)
No Information Rate : 0.3911
P-Value [Acc > NIR] : 0.9886
Kappa : 0.0216
Mcnemar's Test P-Value : 1.477e-08
Statistics by Class:
Class: X0 Class: X1 Class: X2
Sensitivity 0.3559 0.4912 0.20134
Specificity 0.7034 0.5243 0.79310
Pos Pred Value 0.3500 0.3060 0.38462
Neg Pred Value 0.7088 0.7071 0.60726
Prevalence 0.3097 0.2992 0.39108
Detection Rate 0.1102 0.1470 0.07874
Detection Prevalence 0.3150 0.4803 0.20472
Balanced Accuracy 0.5297 0.5078 0.49722
Sensitivity Specificity Pos Pred Value Neg Pred Value
0.13007017 0.75286798 0.23016354 0.60372651
Precision Recall F1 Prevalence
0.23016354 0.13007017 0.16621104 0.36226672
Detection Rate Detection Prevalence Balanced Accuracy
0.04712009 0.20472441 0.44146907
[,1] [,2] [,3]
Sensitivity 0.12292584 0.20134228 0.13007017
Specificity 0.74801917 0.79310345 0.75286798
Pos Pred Value 0.21986675 0.38461538 0.23016354
Neg Pred Value 0.59616434 0.60726073 0.60372651
Precision 0.21986675 0.38461538 0.23016354
Recall 0.12292584 0.20134228 0.13007017
F1 0.15768897 0.26431718 0.16621104
Prevalence 0.36617273 0.39107612 0.36226672
Detection Rate 0.04501209 0.07874016 0.04712009
Detection Prevalence 0.20472441 0.20472441 0.20472441
Balanced Accuracy 0.43547251 0.49722287 0.44146907
Something is wrong; all the logLoss metric values are missing:
logLoss AUC prAUC Accuracy Kappa Mean_F1 Mean_Sensitivity
Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA Median : NA Median : NA Median : NA Median : NA Median : NA
Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA
NA's :1 NA's :1 NA's :1 NA's :1 NA's :1 NA's :1 NA's :1
Mean_Specificity Mean_Pos_Pred_Value Mean_Neg_Pred_Value Mean_Precision Mean_Recall
Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA Median : NA Median : NA Median : NA
Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA
NA's :1 NA's :1 NA's :1 NA's :1 NA's :1
Mean_Detection_Rate Mean_Balanced_Accuracy
Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :1 NA's :1
Error: Stopping
param <- data.frame(nrounds = c(10), max_depth = c(2), eta = c(0.3), gamma = c(0), colsample_bytree = 1, min_child_weight = c(1), subsample = c(1))
rff <- fit(title = "XGBoost",
X = X,
y = y,
model = "xgbTree",
ctrl = ctrl,
grid = param
)#Get a performance metrics table
metricsf <- cbind(lrf$metrics, treef$metrics, rff$metrics)
metricsf
#Plot ROC curves
rocsf <- list(lrf$roc, treef$roc, rff$roc)
# Generate labels
labels <- paste0(names(metrics), ", AUC = ", paste(round(metrics[12, ], 2)))
# httpgd::hgd()
ggroc(rocsf) +
theme_clean() +
scale_color_tableau(labels = labels)Confusion Matrix and Statistics
Reference
Prediction X0 X1 X2
X0 60 32 28
X1 63 77 43
X2 24 28 26
Overall Statistics
Accuracy : 0.4278
95% CI : (0.3776, 0.4792)
No Information Rate : 0.3858
P-Value [Acc > NIR] : 0.052045
Kappa : 0.1246
Mcnemar's Test P-Value : 0.003516
Statistics by Class:
Class: X0 Class: X1 Class: X2
Sensitivity 0.4082 0.5620 0.26804
Specificity 0.7436 0.5656 0.81690
Pos Pred Value 0.5000 0.4208 0.33333
Neg Pred Value 0.6667 0.6970 0.76568
Prevalence 0.3858 0.3596 0.25459
Detection Rate 0.1575 0.2021 0.06824
Detection Prevalence 0.3150 0.4803 0.20472
Balanced Accuracy 0.5759 0.5638 0.54247
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
Confusion Matrix and Statistics
Reference
Prediction X0 X1 X2
X0 37 35 48
X1 56 58 69
X2 18 27 33
Overall Statistics
Accuracy : 0.336
95% CI : (0.2887, 0.3858)
No Information Rate : 0.3937
P-Value [Acc > NIR] : 0.9913
Kappa : 0.0182
Mcnemar's Test P-Value : 4.932e-08
Statistics by Class:
Class: X0 Class: X1 Class: X2
Sensitivity 0.33333 0.4833 0.22000
Specificity 0.69259 0.5211 0.80519
Pos Pred Value 0.30833 0.3169 0.42308
Neg Pred Value 0.71648 0.6869 0.61386
Prevalence 0.29134 0.3150 0.39370
Detection Rate 0.09711 0.1522 0.08661
Detection Prevalence 0.31496 0.4803 0.20472
Balanced Accuracy 0.51296 0.5022 0.51260
Setting direction: controls < cases
Setting direction: controls < cases
Setting direction: controls < cases
Confusion Matrix and Statistics
Reference
Prediction X0 X1 X2
X0 15 12 13
X1 19 24 18
X2 7 12 7
Overall Statistics
Accuracy : 0.3622
95% CI : (0.2788, 0.4522)
No Information Rate : 0.378
P-Value [Acc > NIR] : 0.6741
Kappa : 0.0271
Mcnemar's Test P-Value : 0.2052
Statistics by Class:
Class: X0 Class: X1 Class: X2
Sensitivity 0.3659 0.5000 0.18421
Specificity 0.7093 0.5316 0.78652
Pos Pred Value 0.3750 0.3934 0.26923
Neg Pred Value 0.7011 0.6364 0.69307
Prevalence 0.3228 0.3780 0.29921
Detection Rate 0.1181 0.1890 0.05512
Detection Prevalence 0.3150 0.4803 0.20472
Balanced Accuracy 0.5376 0.5158 0.48536
#Get a performance metrics table
multimetricsf <- cbind(mlrf$cm$byClass[3,], mtreef$cm$byClass[3,], mrff$cm$byClass[3,])
multimetricsf
#Plot ROC curves
mrocs <- list(mlrf$roc, mtreef$roc, mrff$roc)
# Generate labels
labels <- paste0(names(metrics), ", AUC = ", paste(round(metrics[12, ], 2)))
# httpgd::hgd()
ggroc(mrocs) +
theme_clean() +
scale_color_tableau(labels = labels)